package com.shujia.spark.core

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object Demo6Sample {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("filter")
    val sc = new SparkContext(conf)

    val linesRDD: RDD[String] = sc.textFile("data/students.txt")

    /**
     * sample:抽样
     */

    val sampleRDD: RDD[String] = linesRDD.sample(withReplacement = false, fraction = 0.01)

    sampleRDD.foreach(println)
  }

}
